Import modules


In [ ]:
from twython import TwythonStreamer
import string, json, pprint
import urllib
from datetime import datetime
from datetime import date
from time import *
import string, os, sys, subprocess, time
import psycopg2
import re
from osgeo import ogr

Enter your details for twitter API


In [ ]:
# get access to the twitter API
APP_KEY =  'fQCYxyQmFDUE6aty0JEhDoZj7'
APP_SECRET = 'ZwVIgnWMpuEEVd1Tlg6TWMuyRwd3k90W3oWyLR2Ek1tnjnRvEG'
OAUTH_TOKEN =  '824520596293820419-f4uGwMV6O7PSWUvbPQYGpsz5fMSVMct'
OAUTH_TOKEN_SECRET = '1wq51Im5HQDoSM0Fb5OzAttoP3otToJtRFeltg68B8krh'

Set up details for PostGIS DB, run in terminal:

We are going to use a PostGis database, which requires you to have an empty database. Enter these steps into the terminal to set up you databse. In this example we use "demo" as the name of our database. Feel free to give you database another name, but replace "demo" with the name you have chosen.

Connect to postgres

psql -d postgres"

Create database

postgres=# CREATE DATABASE demo;

Switch to new DB

postgres=# \c demo

Add PostGIS extension to new DB

demo=# create extension postgis;

Add Table

demo=# CREATE TABLE tweets (id serial primary key, tweet_id BIGINT, text varchar(140), date DATE, time TIME, geom geometry(POINT,4326) );

Enter your database connection details:


In [ ]:
dbname = "demo"
user = "user"
password = "user"
table = "tweets"

Function which connects to PostGis database and inserts data


In [ ]:
def insert_into_DB(tweet_id, tweet_text, tweet_date, tweet_time, tweet_lat, tweet_lon):
    try:
        conn = psycopg2.connect(dbname = dbname, user = user, password = password)
        cur = conn.cursor()
        # enter stuff in database
        sql = "INSERT INTO " + str(table) + " (tweet_id, text, date, time, geom) \
        VALUES (" + str(tweet_id) + ", '" + str(tweet_text) + "', '" + str(tweet_date) + "', '" + str(tweet_time) + "', \
        ST_GeomFromText('POINT(" + str(tweet_lon) + " " + str(tweet_lat) + ")', 4326))"
        cur.execute(sql)
        conn.commit()
        conn.close()

    except psycopg2.DatabaseError, e:
        print 'Error %s' % e

In [ ]:
def remove_link(text):
    pattern = r'(https://)'
    matcher = re.compile(pattern)
    match = matcher.search(text)
    if match != None:
        text = text[:match.start(1)]
    return text

Process JSON twitter streamd data


In [ ]:
#Class to process JSON data comming from the twitter stream API. Extract relevant fields
class MyStreamer(TwythonStreamer):
    def on_success(self, data):
        tweet_lat = 0.0
        tweet_lon = 0.0
        tweet_name = ""
        retweet_count = 0

        if 'id' in data:
            tweet_id = data['id']
        if 'text' in data:
            tweet_text = data['text'].encode('utf-8').replace("'","''").replace(';','')
            tweet_text = remove_link(tweet_text)
        if 'coordinates' in data:    
            geo = data['coordinates']
            if geo is not None:
                latlon = geo['coordinates']
                tweet_lon = latlon[0]
                tweet_lat = latlon[1]
        if 'created_at' in data:
                dt = data['created_at']
                tweet_datetime = datetime.strptime(dt, '%a %b %d %H:%M:%S +0000 %Y')
                tweet_date = str(tweet_datetime)[:11]
                tweet_time = str(tweet_datetime)[11:]

        if 'user' in data:
                users = data['user']
                tweet_name = users['screen_name']

        if 'retweet_count' in data:
                retweet_count = data['retweet_count']
                    
        if tweet_lat != 0:
                # call function to write to DB
                insert_into_DB(tweet_id, tweet_text, tweet_date, tweet_time, tweet_lat, tweet_lon)
                 
    def on_error(self, status_code, data):
        print "OOPS FOUTJE: " +str(status_code)
        #self.disconnect

Main procedure


In [ ]:
def main():
    try:
        stream = MyStreamer(APP_KEY, APP_SECRET,OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
        print 'Connecting to twitter: will take a minute'
    except ValueError:
        print 'OOPS! that hurts, something went wrong while making connection with Twitter: '+str(ValueError)
    
    
    # Filter based on bounding box see twitter api documentation for more info
    try:
        stream.statuses.filter(locations='-0.351468, 51.38494, 0.148271, 51.672343')
    except ValueError:
        print 'OOPS! that hurts, something went wrong while getting the stream from Twitter: '+str(ValueError)


                
if __name__ == '__main__':
    main()